library(dplyr)

setwd("Your working directory")

# 1. Raw Data
PFP <- read.csv('Raw_Excel_File.csv',
                header=T, sep=",", na.strings="")


# 2. Anonymization
# 2.1 Prolific Information
Prolific_white <- read.csv('Prolific_white.csv',
                           header=T, sep=",", na.strings="")
Prolific_minority <- read.csv('Prolific_minority.csv',
                              header=T, sep=",", na.strings="")

Prolific <- rbind(Prolific_white, Prolific_minority)
Prolific <- Prolific %>% rename(PROLIFIC_PID=participant_id, SESSION_ID=session_id)

PFP_Prolific <- merge(x=PFP, y=Prolific, by=c('PROLIFIC_PID','SESSION_ID'), all.x=TRUE)

# 2.2 Delete incomplete cases
PFP.Com <- PFP %>% subset(Finished=="TRUE") 
# 58 unfinished observations are excluded from the analytic sample. 

# Time Check
Time_check <- PFP.Com %>% # at least more than 1 minute
  mutate(Duration_minutes=Duration..in.seconds./60)
summary(Time_check$Duration_minutes)

# Attention Check (Disagree, 7 respondents failed)
table(PFP.Com$PSM_6, exclude=NULL)

# Duplication Check
Duplicated_ID <- PFP.Com %>% # No duplication 
  group_by(ProlificID) %>% summarise(n=n())

# 2.3 Assigning ID and Deleting Prolific ID
PFP.Anonymized <- PFP.Com %>%
  mutate(ID=row_number()) %>%
  mutate(Study.ID=ifelse(STUDY_ID=="606c63aa1b513c4d7a106551","White","Non-White")) %>%
  dplyr::select(-ProlificID,-PROLIFIC_PID,-SESSION_ID) %>%
  mutate(ID=factor(ID)) %>%
  dplyr::select(ID,ResponseId,STUDY_ID,Study.ID,everything())
save(PFP.Anonymized, file='PFP.Anonymized.rda')
